library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
SNPs <- read.table("23andMe_complete.txt", header=TRUE, sep= "\t")
#Exercise 1
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = chromosome), fill ="blue") +
  ggtitle("Total number of SNPs per Chromosome") +
  ylab("Total number of SNPs") +
  xlab("Chromosome Number")

#Exercise 2
mycolour <- c("AC" = "#E69F00", "AG" = "#E69F00", "AT"= "#E69F00", "CG" = "#E69F00", "CT" = "#E69F00", "GT"= "#E69F00", "AA" = "#56B4E9", "CC" = "#56B4E9", "GG" = "#56B4E9", "TT" = "#56B4E9", "A" = "green", "C" = "green", "G" = "green", "T" = "green", "D" = "magenta", "DD" = "magenta", "DI" = "magenta", "I" = "magenta", "II" = "magenta", "--" = "#999999")

SNPs$chromosome <- ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))

p <- ggplot(SNPs, aes(SNPs$chromosome, fill = genotype))+
  geom_bar(color = "black") +
  ggtitle("Contribution of each Genotype to the Chromosome Count") +
  ylab("Total number of SNPs") +
  xlab("Chromosome Number")

p + scale_fill_manual(values= mycolour)

Exercise 3 Genotypes Measured For Each Chromosome

#Exercise 4
SNPs$chromosome <- ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ggplot(data = SNPs) +
  geom_bar(mapping = aes(x = SNPs$chromosome, fill = genotype), position = "dodge") +
  facet_wrap(~ genotype, ncol = 3) +
  ggtitle("Total number of SNPs per Chromosome Organized by Genotype") +
  ylab("Total number of SNPs") +
  xlab("Chromosome Number")

#Exercise 5
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
SNPs$chromosome <- ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
p <- ggplot(data = SNPs) +
  geom_bar(mapping = aes(x = SNPs$chromosome, fill = genotype), position = "dodge") +
  facet_wrap(~ genotype, ncol = 2)

ggplotly(p)
#Exercise 6
library(DT)
## Warning: package 'DT' was built under R version 3.6.2
y_chrom <- SNPs %>%
  filter(chromosome == "Y")

datatable(y_chrom)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html